function result = ClusteringMeasure_new(Y, predY)
addpath('Measures/');
if size(Y,2) ~= 1
Y = Y';
end;
if size(predY,2) ~= 1
predY = predY';
end;
n = length(Y);
uY = unique(Y);
nclass = length(uY);
Y0 = zeros(n,1);
if nclass ~= max(Y)
for i = 1:nclass
Y0(find(Y == uY(i))) = i;
end;
Y = Y0;
end
uY = unique(predY);
nclass = length(uY);
predY0 = zeros(n,1);
if nclass ~= max(predY)
for i = 1:nclass
predY0(find(predY == uY(i))) = i;
end;
predY = predY0;
end
Lidx = unique(Y); classnum = length(Lidx);
predLidx = unique(predY); pred_classnum = length(predLidx);
% purity
correnum = 0;
for ci = 1:pred_classnum
incluster = Y(find(predY == predLidx(ci)));
% cnub = unique(incluster);
% inclunub = 0;
% for cnubi = 1:length(cnub)
% inclunub(cnubi) = length(find(incluster == cnub(cnubi)));
% end;
inclunub = hist(incluster, 1:max(incluster)); if isempty(inclunub) inclunub=0;end;
correnum = correnum + max(inclunub);
end;
Purity = correnum/length(predY);
%if pred_classnum
res = bestMap(Y, predY);
% accuarcy
ACC = length(find(Y == res))/length(Y);
% NMI
MIhat = MutualInfo(Y,res);
%%
[F,P,R] = compute_f(Y,res);
%%
[AR,RI,MI,HI]=RandIndex(Y, res);
%%
result = [ACC  MIhat F  AR];
%%
function [newL2, c] = bestMap(L1,L2)
%bestmap: permute labels of L2 match L1 as good as possible
% [newL2] = bestMap(L1,L2);
%===========
L1 = L1(:);
L2 = L2(:);
if size(L1) ~= size(L2)
error('size(L1) must == size(L2)');
end
L1 = L1 - min(L1) + 1; % min (L1) <- 1;
L2 = L2 - min(L2) + 1; % min (L2) <- 1;
%=========== make bipartition graph ============
nClass = max(max(L1), max(L2));
G = zeros(nClass);
for i=1:nClass
for j=1:nClass
G(i,j) = length(find(L1 == i & L2 == j));
end
end
%=========== assign with hungarian method ======
[c,t] = hungarian(-G);
newL2 = zeros(nClass,1);
for i=1:nClass
newL2(L2 == i) = c(i);
end
%%
function MIhat = MutualInfo(L1,L2)
% mutual information
%===========
L1 = L1(:);
L2 = L2(:);
if size(L1) ~= size(L2)
error('size(L1) must == size(L2)');
end
L1 = L1 - min(L1) + 1; % min (L1) <- 1;
L2 = L2 - min(L2) + 1; % min (L2) <- 1;
%=========== make bipartition graph ============
nClass = max(max(L1), max(L2));
G = zeros(nClass);
for i=1:nClass
for j=1:nClass
G(i,j) = length(find(L1 == i & L2 == j))+eps;
end
end
sumG = sum(G(:));
%=========== calculate MIhat
P1 = sum(G,2); P1 = P1/sumG;
P2 = sum(G,1); P2 = P2/sumG;
H1 = sum(-P1.*log2(P1));
H2 = sum(-P2.*log2(P2));
P12 = G/sumG;
PPP = P12./repmat(P2,nClass,1)./repmat(P1,1,nClass);
PPP(abs(PPP) < 1e-12) = 1;
MI = sum(P12(:) .* log2(PPP(:)));
MIhat = MI / max(H1,H2);
%%%%%%%%%%%%% why complex ? %%%%%%%%
MIhat = real(MIhat);
%%
function [AR,RI,MI,HI]=RandIndex(c1,c2)
%RANDINDEX - calculates Rand Indices to compare two partitions
% ARI=RANDINDEX(c1,c2), where c1,c2 are vectors listing the 
% class membership, returns the "Hubert & Arabie adjusted Rand index".
% [AR,RI,MI,HI]=RANDINDEX(c1,c2) returns the adjusted Rand index, 
% the unadjusted Rand index, "Mirkin's" index and "Hubert's" index.
%
% See L. Hubert and P. Arabie (1985) "Comparing Partitions" Journal of 
% Classification 2:193-218

%(C) David Corney (2000)   		D.Corney@cs.ucl.ac.uk

if nargin < 2 | min(size(c1)) > 1 | min(size(c2)) > 1
   error('RandIndex: Requires two vector arguments')
   return
end

C=Contingency(c1,c2);	%form contingency matrix

n=sum(sum(C));
nis=sum(sum(C,2).^2);		%sum of squares of sums of rows
njs=sum(sum(C,1).^2);		%sum of squares of sums of columns

t1=nchoosek(n,2);		%total number of pairs of entities
t2=sum(sum(C.^2));	%sum over rows & columnns of nij^2
t3=.5*(nis+njs);

%Expected index (for adjustment)
nc=(n*(n^2+1)-(n+1)*nis-(n+1)*njs+2*(nis*njs)/n)/(2*(n-1));

A=t1+t2-t3;		%no. agreements
D=  -t2+t3;		%no. disagreements

if t1==nc
   AR=0;			%avoid division by zero; if k=1, define Rand = 0
else
   AR=(A-nc)/(t1-nc);		%adjusted Rand - Hubert & Arabie 1985
end

RI=A/t1;			%Rand 1971		%Probability of agreement
MI=D/t1;			%Mirkin 1970	%p(disagreement)
HI=(A-D)/t1;	%Hubert 1977	%p(agree)-p(disagree)
%%
function [C,T]=hungarian(A)
%HUNGARIAN Solve the Assignment problem using the Hungarian method.
%
%[C,T]=hungarian(A)
%A - a square cost matrix.
%C - the optimal assignment.
%T - the cost of the optimal assignment.
%s.t. T = trace(A(C,:)) is minimized over all possible assignments.
% Adapted from the FORTRAN IV code in Carpaneto and Toth, "Algorithm 548:
% Solution of the assignment problem [H]", ACM Transactions on
% Mathematical Software, 6(1):104-111, 1980.
% v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se.
% Department of Computing Science, Ume?University,
% Sweden.
% All standard disclaimers apply.
% A substantial effort was put into this code. If you use it for a
% publication or otherwise, please include an acknowledgement or at least
% notify me by email. /Niclas
[m,n]=size(A);
if (m~=n)
error('HUNGARIAN: Cost matrix must be square!');
end
% Save original cost matrix.
orig=A;
% Reduce matrix.
A=hminired(A);
% Do an initial assignment.
[A,C,U]=hminiass(A);
% Repeat while we have unassigned rows.
while (U(n+1))
% Start with no path, no unchecked zeros, and no unexplored rows.
LR=zeros(1,n);
LC=zeros(1,n);
CH=zeros(1,n);
RH=[zeros(1,n) -1];
% No labelled columns.
SLC=[];
% Start path in first unassigned row.
r=U(n+1);
% Mark row with end-of-path label.
LR(r)=-1;
% Insert row first in labelled row set.
SLR=r;
% Repeat until we manage to find an assignable zero.
while (1)
% If there are free zeros in row r
if (A(r,n+1)~=0)
% ...get column of first free zero.
l=-A(r,n+1);
% If there are more free zeros in row r and row r in not
% yet marked as unexplored..
if (A(r,l)~=0 & RH(r)==0)
% Insert row r first in unexplored list.
RH(r)=RH(n+1);
RH(n+1)=r;
% Mark in which column the next unexplored zero in this row
% is.
CH(r)=-A(r,l);
end
else
% If all rows are explored..
if (RH(n+1)<=0)
% Reduce matrix.
[A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR);
end
% Re-start with first unexplored row.
r=RH(n+1);
% Get column of next free zero in row r.
l=CH(r);
% Advance "column of next free zero".
CH(r)=-A(r,l);
% If this zero is last in the list..
if (A(r,l)==0)
% ...remove row r from unexplored list.
RH(n+1)=RH(r);
RH(r)=0;
end
end
% While the column l is labelled, i.e. in path.
while (LC(l)~=0)
% If row r is explored..
if (RH(r)==0)
% If all rows are explored..
if (RH(n+1)<=0)
% Reduce cost matrix.
[A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR);
end
% Re-start with first unexplored row.
r=RH(n+1);
end
% Get column of next free zero in row r.
l=CH(r);
% Advance "column of next free zero".
CH(r)=-A(r,l);
% If this zero is last in list..
if(A(r,l)==0)
% ...remove row r from unexplored list.
RH(n+1)=RH(r);
RH(r)=0;
end
end
% If the column found is unassigned..
if (C(l)==0)
% Flip all zeros along the path in LR,LC.
[A,C,U]=hmflip(A,C,LC,LR,U,l,r);
% ...and exit to continue with next unassigned row.
break;
else
% ...else add zero to path.
% Label column l with row r.
LC(l)=r;
% Add l to the set of labelled columns.
SLC=[SLC l];
% Continue with the row assigned to column l.
r=C(l);
% Label row r with column l.
LR(r)=l;
% Add r to the set of labelled rows.
SLR=[SLR r];
end
end
end
% Calculate the total cost.
T=sum(orig(logical(sparse(C,1:size(orig,2),1))));
function A=hminired(A)
%HMINIRED Initial reduction of cost matrix for the Hungarian method.
%
%B=assredin(A)
%A - the unreduced cost matris.
%B - the reduced cost matrix with linked zeros in each row.
% v1.0 96-06-13. Niclas Borlin, niclas@cs.umu.se.
[m,n]=size(A);
% Subtract column-minimum values from each column.
colMin=min(A);
A=A-colMin(ones(n,1),:);
% Subtract row-minimum values from each row.
rowMin=min(A')';
A=A-rowMin(:,ones(1,n));
% Get positions of all zeros.
[i,j]=find(A==0);
% Extend A to give room for row zero list header column.
A(1,n+1)=0;
for k=1:n
% Get all column in this row.
cols=j(k==i)';
% Insert pointers in matrix.
A(k,[n+1 cols])=[-cols 0];
end
function [A,C,U]=hminiass(A)
%HMINIASS Initial assignment of the Hungarian method.
%
%[B,C,U]=hminiass(A)
%A - the reduced cost matrix.
%B - the reduced cost matrix, with assigned zeros removed from lists.
%C - a vector. C(J)=I means row I is assigned to column J,
% i.e. there is an assigned zero in position I,J.
%U - a vector with a linked list of unassigned rows.
% v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se.
[n,np1]=size(A);
% Initalize return vectors.
C=zeros(1,n);
U=zeros(1,n+1);
% Initialize last/next zero "pointers".
LZ=zeros(1,n);
NZ=zeros(1,n);
for i=1:n
% Set j to first unassigned zero in row i.
lj=n+1;
j=-A(i,lj);
% Repeat until we have no more zeros (j==0) or we find a zero
% in an unassigned column (c(j)==0).
while (C(j)~=0)
% Advance lj and j in zero list.
lj=j;
j=-A(i,lj);
% Stop if we hit end of list.
if (j==0)
break;
end
end
if (j~=0)
% We found a zero in an unassigned column.
% Assign row i to column j.
C(j)=i;
% Remove A(i,j) from unassigned zero list.
A(i,lj)=A(i,j);
% Update next/last unassigned zero pointers.
NZ(i)=-A(i,j);
LZ(i)=lj;
% Indicate A(i,j) is an assigned zero.
A(i,j)=0;
else
% We found no zero in an unassigned column.
% Check all zeros in this row.
lj=n+1;
j=-A(i,lj);
% Check all zeros in this row for a suitable zero in another row.
while (j~=0)
% Check the in the row assigned to this column.
r=C(j);
% Pick up last/next pointers.
lm=LZ(r);
m=NZ(r);
% Check all unchecked zeros in free list of this row.
while (m~=0)
% Stop if we find an unassigned column.
if (C(m)==0)
break;
end
% Advance one step in list.
lm=m;
m=-A(r,lm);
end
if (m==0)
% We failed on row r. Continue with next zero on row i.
lj=j;
j=-A(i,lj);
else
% We found a zero in an unassigned column.
% Replace zero at (r,m) in unassigned list with zero at (r,j)
A(r,lm)=-j;
A(r,j)=A(r,m);
% Update last/next pointers in row r.
NZ(r)=-A(r,m);
LZ(r)=j;
% Mark A(r,m) as an assigned zero in the matrix . . .
A(r,m)=0;
% ...and in the assignment vector.
C(m)=r;
% Remove A(i,j) from unassigned list.
A(i,lj)=A(i,j);
% Update last/next pointers in row r.
NZ(i)=-A(i,j);
LZ(i)=lj;
% Mark A(r,m) as an assigned zero in the matrix . . .
A(i,j)=0;
% ...and in the assignment vector.
C(j)=i;
% Stop search.
break;
end
end
end
end
% Create vector with list of unassigned rows.
% Mark all rows have assignment.
r=zeros(1,n);
rows=C(C~=0);
r(rows)=rows;
empty=find(r==0);
% Create vector with linked list of unassigned rows.
U=zeros(1,n+1);
U([n+1 empty])=[empty 0];
function [A,C,U]=hmflip(A,C,LC,LR,U,l,r)
%HMFLIP Flip assignment state of all zeros along a path.
%
%[A,C,U]=hmflip(A,C,LC,LR,U,l,r)
%Input:
%A - the cost matrix.
%C - the assignment vector.
%LC - the column label vector.
%LR - the row label vector.
%U - the
%r,l - position of last zero in path.
%Output:
%A - updated cost matrix.
%C - updated assignment vector.
%U - updated unassigned row list vector.
% v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se.
n=size(A,1);
while (1)
% Move assignment in column l to row r.
C(l)=r;
% Find zero to be removed from zero list..
% Find zero before this.
m=find(A(r,:)==-l);
% Link past this zero.
A(r,m)=A(r,l);
A(r,l)=0;
% If this was the first zero of the path..
if (LR(r)<0)
...remove row from unassigned row list and return.
U(n+1)=U(r);
U(r)=0;
return;
else
% Move back in this row along the path and get column of next zero.
l=LR(r);
% Insert zero at (r,l) first in zero list.
A(r,l)=A(r,n+1);
A(r,n+1)=-l;
% Continue back along the column to get row of next zero in path.
r=LC(l);
end
end
function [A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR)
%HMREDUCE Reduce parts of cost matrix in the Hungerian method.
%
%[A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR)
%Input:
%A - Cost matrix.
%CH - vector of column of 'next zeros' in each row.
%RH - vector with list of unexplored rows.
%LC - column labels.
%RC - row labels.
%SLC - set of column labels.
%SLR - set of row labels.
%
%Output:
%A - Reduced cost matrix.
%CH - Updated vector of 'next zeros' in each row.
%RH - Updated vector of unexplored rows.
% v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se.
n=size(A,1);
% Find which rows are covered, i.e. unlabelled.
coveredRows=LR==0;
% Find which columns are covered, i.e. labelled.
coveredCols=LC~=0;
r=find(~coveredRows);
c=find(~coveredCols);
% Get minimum of uncovered elements.
m=min(min(A(r,c)));
% Subtract minimum from all uncovered elements.
A(r,c)=A(r,c)-m;
% Check all uncovered columns..
for j=c
% ...and uncovered rows in path order..
for i=SLR
% If this is a (new) zero..
if (A(i,j)==0)
% If the row is not in unexplored list..
if (RH(i)==0)
% ...insert it first in unexplored list.
RH(i)=RH(n+1);
RH(n+1)=i;
% Mark this zero as "next free" in this row.
CH(i)=j;
end
% Find last unassigned zero on row I.
row=A(i,:);
colsInList=-row(row<0);
if (length(colsInList)==0)
% No zeros in the list.
l=n+1;
else
l=colsInList(row(colsInList)==0);
end
% Append this zero to end of list.
A(i,l)=-j;
end
end
end
% Add minimum to all doubly covered elements.
r=find(coveredRows);
c=find(coveredCols);
% Take care of the zeros we will remove.
[i,j]=find(A(r,c)<=0);
i=r(i);
j=c(j);
for k=1:length(i)
% Find zero before this in this row.
lj=find(A(i(k),:)==-j(k));
% Link past it.
A(i(k),lj)=A(i(k),j(k));
% Mark it as assigned.
A(i(k),j(k))=0;
end
A(r,c)=A(r,c)+m;